/* Table 2 Columns 2 to 5: wage and employment regressions */
libname edu '!userprofile\\Dropbox\Education\Replication\Data';

* get data;
proc sql;
  create table wage_enrolment as
  select a.*, b.log_bachelor, exp(b.log_bachelor_male)/exp(b.log_bachelor_female) as mf_ratio, b.year_ending
  from edu.wagesoc as a, edu.enrolment as b
  where 1 <= a.year - b.year_ending <= 7 and a.major = b.majorcode;
quit;

proc sql;
  create table regression2 as
  select distinct annual_wage, annual_wage_10, delta_tot_emp, delta_tot_emp_10, year, major
  from edu.wagesoc;
quit;

proc sql;
  create table regression2 as
  select distinct a.*, b.log_mktcape_major, b.log_btme_major, b.log_agee_major, b.major
  from regression2 as a, edu.skew as b
  where a.year = b.year + 1 and a.major = b.major;
quit;

proc sql;
  create table enrolment_skew_return2 as
  select a.*, b.ravenskew1e_major
  from regression2 as a, edu.skew as b
  where 3 <= a.year - b.year <= 7 and a.major = b.major and a.major = b.major and a.major ~= .;
quit;

proc sql;
  create table wage_enrolment1 as
  select distinct a.year, a.major, b.log_bachelor, b.mf_ratio
  from regression2 as a, wage_enrolment as b
  where a.year = b.year and 1 <= a.year - b.year_ending <= 2 and a.major = b.major;
quit;

* calculate the average;
proc sort data = enrolment_skew_return2; by year major major; run;

proc means data = enrolment_skew_return2 noprint;
  by year major major;
  var ravenskew1e_major;
  output out = return2 mean = ravenskew1e_major_avg2;
run;

proc sort data = wage_enrolment1; by year major; run;

proc means data = wage_enrolment1 noprint;
  by year major;
  var log_bachelor mf_ratio;
  output out = enrolment1 mean = log_bachelor_avg1 mf_ratio_avg1;
run;

* require all lags to be present;
data return2; set return2; if _FREQ_ < 5 then delete; run;

* create a dataset for regressions;
proc sql;
  create table regression2 as
  select a.*, b.ravenskew1e_major_avg2
  from regression2 as a, return2 as b
  where a.year = b.year and a.major = b.major and a.major = b.major and a.major ~= .;
quit;

proc sql;
  create table regression2 as
  select a.*, b.log_bachelor_avg1, b.mf_ratio_avg1
  from regression2 as a, enrolment1 as b
  where a.year = b.year and a.major = b.major;
quit;

* add time-series measures;
proc sql;
  create table temp as
  select distinct a.*, b.tsmean5_major as tsmean_major_avg2, b.tsvol5_major as tsvol_major_avg2
  from regression2 as a left join edu.skew as b
  on a.year = b.year+3 and a.major = b.major;
quit;

data regression_avg2; set temp; run;

/* merge with wage data (average industry entry-level wage and average major entry-level wage) */
data wage (keep = year major annual_wage);
  set edu.wagesoc;
  if year < 1999 then delete;
run;

data wage2 (keep = year major annual_wage);
  set edu.compustat_wage;
  if year >= 1999 then delete;
  annual_wage = annual_wagee_major;
run;

proc append base = wage2 data = wage; run;

proc sql;
  create table regression_avg2 as
  select a.*, b.annual_wage as annual_wage_avg2, b.year
  from regression_avg2 as a left join wage2 as b
  on a.major = b.major and 
  a.year - b.year = 3;
quit;

* year fixed effects, major fixed effects, cluster std error by year;
data regression_avg2; 
  set regression_avg2;
  if major = 12 or major = 21 then delete;
run;

proc standard data = regression_avg2 out = regression_avg3 mean = 0 std = 1; run;

data regression_avg2; set regression_avg2; num = _N_; run;

data regression_avg3 (drop = year major annual_wage annual_wage_10 delta_tot_emp delta_tot_emp_10); set regression_avg3; num = _N_; run;

proc sql;
  create table regression_avg2 as
  select a.*, b.year, b.major, b.annual_wage, b.annual_wage_10, b.delta_tot_emp, b.delta_tot_emp_10
  from regression_avg3 as a, regression_avg2 as b
  where a.num = b.num;
quit;

proc surveyreg data = regression_avg2;
  class
  year 
  major
  ;
 cluster year;

  model annual_wage =
  ravenskew1e_major_avg2
  tsmean_major_avg2 
  tsvol_major_avg2


  log_bachelor_avg1
  mf_ratio_avg1
  annual_wage_avg2

  log_mktcape_major 
  log_btme_major 
  log_agee_major
  year 
  major
  /solution adjrsq;
run;

proc surveyreg data = regression_avg2;
  class
  year 
  major
  ;
 cluster year;

  model delta_tot_emp =
  ravenskew1e_major_avg2
  tsmean_major_avg2 
  tsvol_major_avg2


  log_bachelor_avg1
  mf_ratio_avg1
  annual_wage_avg2

  log_mktcape_major 
  log_btme_major 
  log_agee_major
  year 
  major
  /solution adjrsq;
run;
